# delimit;
set more off;

clear all;
cap log close;

local cavars = "balnewauto lbalnewauto numautoopen lbal_bcret_tot lbal_bankc_tot lbal_auto_tot lbal_home_tot lbal_consf_tot lbal_other2_tot bal_bcret_tot bal_bankc_tot bal_auto_tot bal_home_tot_cond bal_consf_tot bal_other2_tot riskscore gt1_all_90 fstart_cond moveoutblock moveouttract moveoutcounty moveoutstate moveblock moveblock3yr movecounty3yr movetract movecounty movestate numnewacct inq3m";

local movevars = "moveblock movetract movecounty movestate moveblock3yr movetract3yr movecounty3yr movestate3yr";

log using ../logs/create_tornado_panel_20220330.txt, text replace;


* Create Census block variables;
use ../raw_data/census2000_block_variables_us.dta, clear;

gen fooblock = fwa001 / (fwa001 + fwa002);
gen faablock = fye002 / fxs001;
gen fhispblock = fxz001 / (fxz001 + fxz002);
gen f65pblock = (fym018 + fym019 + fym020 + fym021 + fym022 + fym023 + fym041 + fym042 + fym043 + fym044 + fym045 + fym046) / fxs001;

keep block2000 fooblock faablock fhispblock f65pblock;
sum;
compress;
sort block2000;
save ../raw_data/census2000_block_vars.dta, replace;



* Create dataset of quarterly new auto loan data in tradelines based on account opendate;
use ../raw_data/ccp_auto_tradeline_near.dta, clear;
gen sopendate = date(opendate, "YMD");
format sopendate %td;

gen qdate = yq(year(qtr), quarter(qtr));
format qdate %tq;

* Semi-annual data during our sample period end of Q2 and Q4;
tab qdate;

gen qopendate = yq(year(sopendate), quarter(sopendate));
format qopendate %tq;

* this apears continuous, not affected by only having semi-annual observations;
tab qopendate;


* There are duplicated tradeline IDs;
* We will assume that a record is a duplicate if it has the same open date and opening balance (hicredit);

gsort cid tlid qdate sopendate hicredit;
count if cid==cid[_n-1] & tlid==tlid[_n-1] & qdate==qdate[_n-1];

* There are relatively few instances (3,759 out of 2.7 million records) of duplicates with same open date and hicredit;
* We will count those as true duplicates and drop them;
count if cid==cid[_n-1] & tlid==tlid[_n-1] & qdate==qdate[_n-1] & sopendate==sopendate[_n-1] & hicredit==hicredit[_n-1];
drop if cid==cid[_n-1] & tlid==tlid[_n-1] & qdate==qdate[_n-1] & sopendate==sopendate[_n-1] & hicredit==hicredit[_n-1];

* Next we need limit the sample to one observation per loan;
gsort cid sopendate hicredit qdate;
count if cid==cid[_n-1] & sopendate==sopendate[_n-1] & hicredit==hicredit[_n-1];
drop if cid==cid[_n-1] & sopendate==sopendate[_n-1] & hicredit==hicredit[_n-1];
count;

sum balance hicredit, d;

gen numautoopen = 1;

collapse (sum) balance numautoopen hicredit, by(cid qopendate);
sum balance numautoopen, d;
* 99% of observations have only 1 new auto loan per quarter;
rename hicredit balnewauto;
rename qopendate qdate;

compress;
sort cid qdate;
save ../tmp_data/ccp_new_auto_loan_data_near.dta, replace;




use ../raw_data/ccp_primary_cid_100pct_all_county_studentloan_1999Q1-2017Q4.dta, clear;
* from FRBNY fix for student loan balances;
tab qtr;
gen bal_stdln_tot = 0.5 * (cust_attr326+cust_attr330) + cust_attr334;
sort cid qtr;
drop if cid==cid[_n-1] & qtr==qtr[_n-1];
drop if year(qtr) < 2003;
keep cid qtr bal_stdln_tot;
sort cid qtr;
compress;
save ../tmp_data/ccp_primary_cid_100pct_all_county_studentloan.dta, replace;



* Fix for misclassified tornadoes Kyle Musser found 7/18/2019 PDD in MO not in IL;
use ../raw_data/hit_blocks_intensity_sample_v2.dta, clear;
replace individual_assistance = 1 if (id == 28 & state_fips == "29");
replace public_assistance = 1 if (id == 28 & state_fips == "29");
replace individual_assistance = 0 if (id == 28 & state_fips == "17");
replace public_assistance = 0 if (id == 28 & state_fips == "17");
save ../raw_data/hit_blocks_intensity_sample_v2.dta, replace;

* Fix for misclassified tornadoes Dan found 7/16/2019;
use ../raw_data/hit_blocks_intensity_sample_v2.dta, clear;
replace individual_assistance = 1 if (id == 25 | id == 26);
replace public_assistance = 1 if (id == 25 | id == 26);
save ../raw_data/hit_blocks_intensity_sample_v2.dta, replace;

* Fix for misclassified tornadoes Justin and Simin found 6/27/2018;
use ../raw_data/hit_blocks_intensity_sample_v2.dta, clear;
replace individual_assistance = 1 if id == 74;
replace public_assistance = 1 if id == 74;
replace individual_assistance = 0 if id == 87;
replace public_assistance = 1 if id == 87;
save ../raw_data/hit_blocks_intensity_sample_v2.dta, replace;


use ../raw_data/ccp_primary_cid_100pct_studentloan_1999Q1-2017Q4_v2.dta, clear;
* from FRBNY fix for student loan balances;
tab qtr;
gen bal_stdln_tot = 0.5 * (cust_attr326+cust_attr330) + cust_attr334;
sort cid qtr;
drop if cid==cid[_n-1] & qtr==qtr[_n-1];
drop if year(qtr) < 2003;
keep cid qtr bal_stdln_tot;
sort cid qtr;
compress;
save ../tmp_data/ccp_primary_cid_100pct_studentloan.dta, replace;


use ../raw_data/ccp_primary_cid_100pct_bankruptcy-7-12_1999Q1-2017Q4_v2.dta, clear;
sort cid qtr;
drop if cid==cid[_n-1] & qtr==qtr[_n-1];                                     
sort cid qtr;
compress;
save ../tmp_data/ccp_primary_cid_100pct_bankruptcy-7-12.dta, replace;




use ../raw_data/hit_blocks_intensity_sample_v2.dta, clear;

tab id;

rename fips_num block_str;
drop if block_str == "";

sort id block_str;
drop if id == id[_n-1] & block_str == block_str[_n-1];

gen ef_group1= (weighted_intensity>=0 & weighted_intensity<2);
gen ef_group2= (weighted_intensity>=2 & weighted_intensity<4);
gen ef_group3= (weighted_intensity>=4 & weighted_intensity<=6);

gen ef_group4= (weighted_intensity>=0 & weighted_intensity<3);
gen ef_group5= (weighted_intensity>=3 & weighted_intensity<6);

rename low_damage dam_weight_g1;
rename med_damage dam_weight_g2;
rename high_damage dam_weight_g3;

keep id block_str dam_* ef_group* weighted_intensity area_hit flag_area_hit;
sort id block_str;
compress;

save ../tmp_data/tornado_hit_damage_intensity.dta, replace;

gen state_fips = substr(block_str, 1, 2);

sort id state_fips;
drop if (id == id[_n-1] & state_fips == state_fips[_n-1]);
keep id state_fips;
save ../tmp_data/tornado_hit_damage_intensity_ids.dta, replace;



* Make set of blocks that are fully in the 0.5-1.5 mile buffer;

use ../raw_data/tornado_blocks_buffersF4F5_v2.dta, clear;

gen sdate = date(date, "MDY");
gen year = year(sdate);

keep if buffer_dist == "05_15";
rename fips_num block_str;

sort id block_str;
count if id==id[_n-1] & block_str==block_str[_n-1];
drop if id==id[_n-1] & block_str==block_str[_n-1];
count;

rename ratio_hit area_hit;

keep id block_str area_hit;
sort id block_str;
desc;
sum;
tab id;
save ../tmp_data/buffer_blocks_within_05_15.dta, replace;




    
* make buffer blocks within 0.5 miles;
use ../raw_data/tornado_blocks_buffersF4F5_v2.dta, clear;

gen sdate = date(date, "MDY");
gen year = year(sdate);
keep if buffer_dist == "5";
rename fips_num block_str;

sort id block_str;
count if id==id[_n-1] & block_str==block_str[_n-1];
drop if id==id[_n-1] & block_str==block_str[_n-1];
count;

rename ratio_hit area_hit;

keep id block_str area_hit;
sort id block_str;
desc;
sum;
tab id;
save ../tmp_data/buffer_blocks_within_05.dta, replace;




* make buffer blocks within 1 mile;
use ../raw_data/tornado_blocks_buffersF4F5_v2.dta, clear;

gen sdate = date(date, "MDY");
gen year = year(sdate);
keep if buffer_dist == "1";
rename fips_num block_str;

sort id block_str;
count if id==id[_n-1] & block_str==block_str[_n-1];
drop if id==id[_n-1] & block_str==block_str[_n-1];
count;

rename ratio_hit area_hit;

* DAH - 8/24/2021;
* rule for block being in buffer or treatment is if it is more than 50% in;

keep if area_hit > 0.5;
keep id block_str area_hit;
sort id block_str;
desc;
sum;
tab id;
save ../tmp_data/buffer_blocks_within_1.dta, replace;

* make buffer blocks within 2 miles;
use ../raw_data/tornado_blocks_buffersF4F5_v2.dta, clear;

gen sdate = date(date, "MDY");
gen year = year(sdate);
keep if buffer_dist == "2";

rename fips_num block_str;

sort id block_str;
count if id==id[_n-1] & block_str==block_str[_n-1];
drop if id==id[_n-1] & block_str==block_str[_n-1];
count;

rename ratio_hit area_hit;

keep id block_str area_hit;
sort id block_str;
desc;
sum;
tab id;
save ../tmp_data/buffer_blocks_within_2.dta, replace;




* make buffer blocks 1 - 2 miles;
use ../tmp_data/buffer_blocks_within_2.dta;

* DAH - 8/24/2021;
* rule for block being in buffer or treatment is if it is more than 50% in;
keep if area_hit > 0.5;
merge 1:1 id block_str using ../tmp_data/buffer_blocks_within_1.dta;
keep if _merge == 1;
drop _merge;
sort id block_str;
desc;
sum;
tab id;
save ../tmp_data/buffer_blocks_within_1_2.dta, replace;



* append hit and buffer blocks for maps;
use ../tmp_data/tornado_hit_damage_intensity.dta;
gen treated = 1;
append using ../tmp_data/buffer_blocks_within_05_15.dta;
replace treated = 0 if treated == .;
tab id treated;

keep id block_str area_hit treated;

* include blocks 50% or more in tornado path or buffer area;
keep if area_hit >= 0.5;
tab id treated;
export dbase using ../tmp_data/tornado_blocks_for_map_50pct.dbf, replace;

* include blocks 75% or more in tornado path or buffer area;
keep if area_hit >= 0.75;
tab id treated;
export dbase using ../tmp_data/tornado_blocks_for_map_75pct.dbf, replace;

* include blocks 99% or more in tornado path or buffer area;
keep if area_hit >= 0.99;
tab id treated;
export dbase using ../tmp_data/tornado_blocks_for_map_99pct.dbf, replace;

clear;



* make tornado dataset with only variables that do not vary by block;

use ../raw_data/hit_blocks_intensity_sample_v2.dta;

sort id;
merge m:1 id using ../raw_data/tornado_dates.dta;
drop if _merge == 2;
drop _merge;

tab id;

rename fips_num block_str;
drop if block_str == "";
sort block_str;
gen qdate = yq(year(sdate), quarter(sdate));
format qdate %tq;
drop if qdate == .;

sort id state_fips;
drop if (id == id[_n-1] & state_fips == state_fips[_n-1]);

* keep only tornado info that does not vary by block for later merge;
keep id state_fips public_assistance individual_assistance sdate qdate;
sort id state_fips;
compress;
count;
save ../tmp_data/tornado_intensity_near.dta, replace;



* Use donut control group that excludes closest 0.5 miles;
use ../tmp_data/tornado_hit_damage_intensity.dta, clear;
gen treated = 1;
append using ../tmp_data/buffer_blocks_within_05_15.dta;
replace treated = 0 if treated == .;
tab id treated;

drop dam_weight_g1 dam_weight_g2 dam_weight_g3;

replace ef_group1 = 0 if treated == 0;
replace ef_group2 = 0 if treated == 0;
replace ef_group3 = 0 if treated == 0;
replace ef_group4 = 0 if treated == 0;
replace ef_group5 = 0 if treated == 0;
replace weighted_intensity = 0 if treated == 0;
    
sort id block_str treated;
count if id==id[_n-1] & block_str==block_str[_n-1];
count if id==id[_n-1] & block_str==block_str[_n-1] & treated==treated[_n-1];

tab treated;

* 1698 cases of blocks being both treated and in the buffer zone;
* only 19 cases when limited to g1, 2 for g2, and 0 for g3;
* we will drop the blocks from the buffer sample_v2 in these cases;
gsort id block_str -treated;
count if id==id[_n-1] & block_str==block_str[_n-1];
tab id treated if id==id[_n-1] & block_str==block_str[_n-1];

drop if id==id[_n-1] & block_str==block_str[_n-1];
count if id==id[_n-1] & block_str==block_str[_n-1];

tab treated;

gen state_fips = substr(block_str, 1, 2);

sort id state_fips;
merge m:1 id state_fips using ../tmp_data/tornado_hit_damage_intensity_ids.dta;
keep if _merge == 3;
drop _merge;

drop state_fips;

tab treated;
sort id block_str;
compress;
save ../tmp_data/tornado_damage_intensity_blocks_05_15.dta, replace;




* create alternative control group in 1-2 miles around tornado path;
* Use donut control group that excludes closest 1 miles;
use ../tmp_data/tornado_hit_damage_intensity.dta, clear;
gen treated = 1;
append using ../tmp_data/buffer_blocks_within_1_2.dta;
replace treated = 0 if treated == .;
tab id treated;

drop dam_weight_g1 dam_weight_g2 dam_weight_g3;

replace ef_group1 = 0 if treated == 0;
replace ef_group2 = 0 if treated == 0;
replace ef_group3 = 0 if treated == 0;
replace ef_group4 = 0 if treated == 0;
replace ef_group5 = 0 if treated == 0;
replace weighted_intensity = 0 if treated == 0;
    
sort id block_str treated;
count if id==id[_n-1] & block_str==block_str[_n-1];
count if id==id[_n-1] & block_str==block_str[_n-1] & treated==treated[_n-1];

tab treated;

* 1698 cases of blocks being both treated and in the buffer zone;
* only 19 cases when limited to g1, 2 for g2, and 0 for g3;
* we will drop the blocks from the buffer sample_v2 in these cases;
gsort id block_str -treated;
drop if id==id[_n-1] & block_str==block_str[_n-1];
count if id==id[_n-1] & block_str==block_str[_n-1];

tab treated;

gen state_fips = substr(block_str, 1, 2);

sort id state_fips;
merge m:1 id state_fips using ../tmp_data/tornado_hit_damage_intensity_ids.dta;
keep if _merge == 3;
drop _merge;

drop state_fips;

tab treated;
sort id block_str;
compress;
save ../tmp_data/tornado_damage_intensity_blocks_1_2.dta, replace;

    


* Prepare 4 damage heterogeneity treatment control files to merge onto ccp;
* new sample of blocks from Justin 9/22/2018;

use ../tmp_data/tornado_damage_intensity_blocks_05_15.dta, clear;

sort id block_str;
count if id==id[_n-1] & block_str==block_str[_n-1];

gen state_fips = substr(block_str, 1, 2);

* merge in tornado instensity;
sort id state_fips;
merge m:1 id state_fips using ../tmp_data/tornado_intensity_near.dta;
keep if _merge == 3;
drop _merge;

compress;

sort id block_str qdate;
count if id==id[_n-1] & block_str==block_str[_n-1] & qdate==qdate[_n-1];
tab id if id==id[_n-1] & block_str==block_str[_n-1] & qdate==qdate[_n-1];

desc;

* Use quarter before tornado for CCP;
replace qdate = qdate - 1;

sort id block_str;
desc;
sum;
tab id;
save ../tmp_data/hit_and_buffer_blocks_within_hetall_near_05_15.dta, replace;




keep block_str qdate id treated ef_group* weighted_intensity area_hit;
gsort block_str qdate -id;

count if block_str==block_str[_n-1] & qdate==qdate[_n-1];
drop  if block_str==block_str[_n-1] & qdate==qdate[_n-1];
desc;
sum;
tab id;
save ../tmp_data/hit_and_buffer_blocks_within_hetall_for_first_merge_near_05_15.dta, replace;


* Create tornado file used to create the migration dataset that keeps people that have ever lived;
* in the tornado and control areas in the pre-period;

expand 25;
sort id block_str qdate;
by id block_str qdate: gen obsnum = _n;
tab obsnum;
gen qsince = obsnum - 13;
replace qdate = qdate + qsince + 1;
drop obsnum qsince;

sort block_str qdate;
count if block_str==block_str[_n-1] & qdate==qdate[_n-1];

sort id block_str qdate;
compress;

save ../tmp_data/hit_and_buffer_blocks_within_hetall_for_first_merge_near_25q_05_15.dta, replace;



* create the similar 3 files for the 1-2 mile control sample;
use ../tmp_data/tornado_damage_intensity_blocks_1_2.dta, clear;

sort id block_str;
count if id==id[_n-1] & block_str==block_str[_n-1];

gen state_fips = substr(block_str, 1, 2);

* merge in tornado instensity;
sort id state_fips;
merge m:1 id state_fips using ../tmp_data/tornado_intensity_near.dta;
keep if _merge == 3;
drop _merge;

compress;

sort id block_str qdate;
count if id==id[_n-1] & block_str==block_str[_n-1] & qdate==qdate[_n-1];
tab id if id==id[_n-1] & block_str==block_str[_n-1] & qdate==qdate[_n-1];

desc;

* Use quarter before tornado for CCP;
replace qdate = qdate - 1;

sort id block_str;
desc;
sum;
tab id;
save ../tmp_data/hit_and_buffer_blocks_within_hetall_near_1_2.dta, replace;

keep block_str qdate id treated ef_group* weighted_intensity area_hit;
gsort block_str qdate -id;

count if block_str==block_str[_n-1] & qdate==qdate[_n-1];
drop  if block_str==block_str[_n-1] & qdate==qdate[_n-1];
desc;
sum;
tab id;
save ../tmp_data/hit_and_buffer_blocks_within_hetall_for_first_merge_near_1_2.dta, replace;


* Create tornado file used to create the migration dataset that keeps people that have ever lived;
* in the tornado and control areas in the pre-period;

expand 25;
sort id block_str qdate;
by id block_str qdate: gen obsnum = _n;
tab obsnum;
gen qsince = obsnum - 13;
replace qdate = qdate + qsince + 1;
drop obsnum qsince;
sort block_str qdate;
count if block_str==block_str[_n-1] & qdate==qdate[_n-1];

sort id block_str qdate;
compress;

save ../tmp_data/hit_and_buffer_blocks_within_hetall_for_first_merge_near_25q_1_2.dta, replace;




/*
use ../raw_data/ccp_primary_cid_100pct.dta;
keep cid state county_code census_tract census_block;

sort state;
merge m:1 state using ../raw_data/state_fips.dta;
tab _merge;
keep if _merge == 3;
drop _merge;

drop state;
gen block_str = string(state_code, "%02.0f") + county_code + census_tract + census_block;

drop state_code county_code census_tract census_block;

sort block_str;
merge m:1 block_str using ../tmp_data/tornado_block_list_for_ccp_v2.dta;
keep if _merge == 3;
drop _merge;

keep cid;
sort cid;
drop if cid == cid[_n-1];
compress;
save ../tmp_data/ccp_primary_cid_ever_in_tornado_block_v2.dta, replace;
outsheet using ../tmp_data/ccp_primary_cid_ever_in_tornado_block_v2.csv, noquote comma replace;
count;

* there are 175,272 cid's that ever lived in a tornado or buffer block v2;
clear;
*/










* create baseline sample with 0.5-1.5 mile control group and sample with 1-2 mile control group;
local controlgroups _05_15 _1_2;

foreach cg in `controlgroups' {;

use ../raw_data/ccp_ever_tornado_block_100pct_v2.dta;

sort cid qtr;
drop if cid==cid[_n-1] & qtr==qtr[_n-1];                                     
merge 1:1 cid qtr using ../tmp_data/ccp_primary_cid_100pct_bankruptcy-7-12.dta;
drop if _merge==2;
drop _merge;

sort cid qtr;
drop if cid==cid[_n-1] & qtr==qtr[_n-1];                                     
merge 1:1 cid qtr using ../tmp_data/ccp_primary_cid_100pct_studentloan.dta;
drop if _merge==2;
drop _merge;

sort cid qtr;

gen qdate = yq(year(qtr), quarter(qtr));
format qdate %tq;
drop qtr;

sort cid qdate;
merge m:1 cid qdate using ../tmp_data/ccp_new_auto_loan_data_near.dta;
drop if _merge == 2;
drop _merge;
replace balnewauto = 0 if balnewauto == .;                               
replace numautoopen = 0 if numautoopen == .;                               

* Merge in state fips codes;

sort state;
merge m:1 state using ../raw_data/state_fips.dta;
tab _merge;
keep if _merge == 3;
drop _merge;

* drop observations missing a census tract since we can't tell where they are living at that time;
drop if (census_tract == "000000" | census_tract == "");

* Current block of residence;
gen block_str = string(state_code, "%02.0f") + county_code + census_tract + census_block;

preserve;

keep if qdate == yq(2000, 1);
save ../tmp_data/ccp_for_census2000_comparison.dta, replace;

restore;


* Merge in tornado hits;
* the using file has qdate set to the quarter before the tornado;
sort block_str qdate;

display "test `cg'";
display "../tmp_data/hit_and_buffer_blocks_within_hetall_for_first_merge_near`cg'";
display "test 1";

merge m:1 block_str qdate using ../tmp_data/hit_and_buffer_blocks_within_hetall_for_first_merge_near`cg';
display "test 2";
drop if _merge == 2;

* count a person as present for the tornado if they were there at the end of the quarter before the tornado;
gen pres_tornado = (_merge == 3);
drop _merge;


* Need to figure out how many times is the max to be hit by a tornado or in a buffer block;
bys cid: egen max_tornado = sum(pres_tornado);
bys cid: gen obsnum = _n;
egen tmp_treated = min(treated), by(cid);
drop treated;
rename tmp_treated treated;

tab max_tornado treated if obsnum == 1;

* Drop people affected by multiple tornadoes for now;
* Drop people who were not present at the time of the tornado or who experienced more than 1 tornado;
* 449 people experienced 2 tornadoes and 27 experienced 3;
drop if (max_tornado == . | max_tornado == 0 | max_tornado > 1);

tab max_tornado treated if obsnum == 1;

sum;

* spread block of residence in quarter of tornado to all observations of person and damcat;
gen block_str_qhit = block_str if id ~= .;
gsort cid qdate;
replace block_str_qhit = block_str_qhit[_n-1] if block_str_qhit=="" & block_str_qhit[_n-1]~="" & cid==cid[_n-1];
replace weighted_intensity = weighted_intensity[_n-1] if weighted_intensity==. & weighted_intensity[_n-1]~=. & cid==cid[_n-1];
replace area_hit = area_hit[_n-1] if area_hit==. & area_hit[_n-1]~=. & cid==cid[_n-1];
replace ef_group1 = ef_group1[_n-1] if ef_group1==. & ef_group1[_n-1]~=. & cid==cid[_n-1];
replace ef_group2 = ef_group2[_n-1] if ef_group2==. & ef_group2[_n-1]~=. & cid==cid[_n-1];
replace ef_group3 = ef_group3[_n-1] if ef_group3==. & ef_group3[_n-1]~=. & cid==cid[_n-1];
replace ef_group4 = ef_group4[_n-1] if ef_group4==. & ef_group4[_n-1]~=. & cid==cid[_n-1];
replace ef_group5 = ef_group5[_n-1] if ef_group5==. & ef_group5[_n-1]~=. & cid==cid[_n-1];
gsort cid -qdate;
replace block_str_qhit = block_str_qhit[_n-1] if block_str_qhit=="" & block_str_qhit[_n-1]~="" & cid==cid[_n-1];
replace weighted_intensity = weighted_intensity[_n-1] if weighted_intensity==. & weighted_intensity[_n-1]~=. & cid==cid[_n-1];
replace area_hit = area_hit[_n-1] if area_hit==. & area_hit[_n-1]~=. & cid==cid[_n-1];
replace ef_group1 = ef_group1[_n-1] if ef_group1==. & ef_group1[_n-1]~=. & cid==cid[_n-1];
replace ef_group2 = ef_group2[_n-1] if ef_group2==. & ef_group2[_n-1]~=. & cid==cid[_n-1];
replace ef_group3 = ef_group3[_n-1] if ef_group3==. & ef_group3[_n-1]~=. & cid==cid[_n-1];
replace ef_group4 = ef_group4[_n-1] if ef_group4==. & ef_group4[_n-1]~=. & cid==cid[_n-1];
replace ef_group5 = ef_group5[_n-1] if ef_group5==. & ef_group5[_n-1]~=. & cid==cid[_n-1];

sum;


* spread tornado id to all observations of person;
bys cid: egen tmp_id = min(id);
replace id = tmp_id if id == .;
drop tmp_id;

* do all things based upon block of residence at the end of the quarter before the tornado;
rename block_str tmp_block_str;
rename block_str_qhit block_str;

* merge in full tornado information;
sort id block_str;
merge m:1 id block_str using ../tmp_data/hit_and_buffer_blocks_within_hetall_near`cg';
drop if _merge == 2;
drop _merge;



* Merge in block group data regarding demographics of block group at time of tornado;
* merge in census block group demographic variables;
gen bg_str = substr(block_str, 1, 12);
sort bg_str;
merge m:1 bg_str using ../raw_data/census2000_blck_grp.dta;
drop if _merge == 2;
drop _merge;
sort bg_str;
merge m:1 bg_str using ../raw_data/census2000_blck_grp_vars.dta;
drop if _merge == 2;
drop _merge;
drop bg_str;

rename block_str block2000;
merge m:1 block2000 using ../raw_data/census2000_block_vars.dta;
drop if _merge == 2;
drop _merge;
rename block2000 block_str;

rename block_str block_str_qhit;
rename tmp_block_str block_str;

* get the actual quarter of the tornado;
gen qtornado = yq(year(sdate), quarter(sdate));

count if qdate == .;
count if qtornado == .;

* the actual quarter of the tornado is when qsince = 0;
* this means the tornado happens between -1 and 0;
gen qsince = (qdate - qtornado);

* create ccp variables;
do variable_creation_num.do;
do variable_creation_gt1.do;
do variable_creation_bal.do;

compress;

* Put balances into real terms;
sort qdate;
merge m:1 qdate using ../raw_data/cpi-r_allitems_urban_quarterly_sa.dta;
drop if _merge == 2;
drop _merge;


* Put into 2010Q1 dollars;
foreach v of varlist bal_* balnewauto {;
  replace `v' = `v' * 217.34067 / cpi;
};

foreach v of varlist hic_* {;
  replace `v' = `v' * 217.34067 / cpi;
};
gen lbal_auto_tot = log(1+bal_auto_tot);
gen lbal_bcret_tot = log(1+bal_bcret_tot);
gen lbal_bankc_tot = log(1+bal_bankc_tot);
gen lbal_home_tot = log(1+bal_home_tot);
gen lbal_consf_tot = log(1+bal_consf_tot);
gen lbal_other2_tot = log(1+bal_other2_tot);
gen lbalnewauto = log(1+balnewauto);


drop lmedoohval lmedhhinc;
replace medoohval = medoohval * 217.34067 / 170.1;
replace medhhinc = medhhinc * 217.34067 / 170.1;
gen lmedoohval = log(medoohval);
gen lmedhhinc = log(medhhinc);
compress;

save ../tmp_data/ccp_tornado_sample_not_balanced_within_hetall_near`cg', replace;









* Create the baseline samplle with 0.5-1.5 miles control group;


local buffer = "hetall";

use ../tmp_data/ccp_tornado_sample_not_balanced_within_`buffer'_near`cg', clear;

gen ttype1 = treated;
replace ttype1 = . if area_hit < 0.5;

tab ttype1;

* ttype dummies: "1 = majority hit";

* save sample ttype 1;
drop if ttype1 == .;
save ../tmp_data/ccp_tornado_sample_not_balanced_within_`buffer'_ttype1_near`cg', replace;



local buffer = "hetall";
local numq = "12";
local tt = "1";


use ../tmp_data/ccp_tornado_sample_not_balanced_within_`buffer'_ttype`tt'_near`cg', clear;

* drop dead people;
drop if dead == 1;

sort cid qdate;
drop if cid==cid[_n-1] & qdate == qdate[_n-1];                    

gen tract_str = substr(block_str, 1, 11);
gen county_str = substr(block_str, 1, 5);
gen state_str = substr(block_str, 1, 2);
                  
* create moving variables before limiting sample to 12 quarters before tornado;
sort cid qdate;
gen moveblock = (block_str ~= block_str[_n-1]);
gen movetract = (tract_str ~= tract_str[_n-1]);
gen movecounty = (county_str ~= county_str[_n-1]);
gen movestate = (state_str ~= state_str[_n-1]);

* create moving variables before limiting sample to 12 quarters before tornado;
* conditional on not moving back in the next 3 years;
sort cid qdate;
gen moveblock3yr = (block_str ~= block_str[_n-1]);
replace moveblock3yr = 0 if (block_str[_n+1] == block_str[_n-1] | block_str[_n+2] == block_str[_n-1] | block_str[_n+3] == block_str[_n-1] | block_str[_n+4] == block_str[_n-1] | block_str[_n+5] == block_str[_n-1] | block_str[_n+6] == block_str[_n-1] | block_str[_n+7] == block_str[_n-1] | block_str[_n+8] == block_str[_n-1] | block_str[_n+9] == block_str[_n-1] | block_str[_n+10] == block_str[_n-1] | block_str[_n+11] == block_str[_n-1] | block_str[_n+12] == block_str[_n-1]);
gen movetract3yr = (tract_str ~= tract_str[_n-1]);
replace movetract3yr = 0 if (tract_str[_n+1] == tract_str[_n-1] | tract_str[_n+2] == tract_str[_n-1] | tract_str[_n+3] == tract_str[_n-1] | tract_str[_n+4] == tract_str[_n-1] | tract_str[_n+5] == tract_str[_n-1] | tract_str[_n+6] == tract_str[_n-1] | tract_str[_n+7] == tract_str[_n-1] | tract_str[_n+8] == tract_str[_n-1] | tract_str[_n+9] == tract_str[_n-1] | tract_str[_n+10] == tract_str[_n-1] | tract_str[_n+11] == tract_str[_n-1] | tract_str[_n+12] == tract_str[_n-1]);
gen movecounty3yr = (county_str ~= county_str[_n-1]);
replace movecounty3yr = 0 if (county_str[_n+1] == county_str[_n-1] | county_str[_n+2] == county_str[_n-1] | county_str[_n+3] == county_str[_n-1] | county_str[_n+4] == county_str[_n-1] | county_str[_n+5] == county_str[_n-1] | county_str[_n+6] == county_str[_n-1] | county_str[_n+7] == county_str[_n-1] | county_str[_n+8] == county_str[_n-1] | county_str[_n+9] == county_str[_n-1] | county_str[_n+10] == county_str[_n-1] | county_str[_n+11] == county_str[_n-1] | county_str[_n+12] == county_str[_n-1]);
gen movestate3yr = (state_str ~= state_str[_n-1]);
replace movestate3yr = 0 if (state_str[_n+1] == state_str[_n-1] | state_str[_n+2] == state_str[_n-1] | state_str[_n+3] == state_str[_n-1] | state_str[_n+4] == state_str[_n-1] | state_str[_n+5] == state_str[_n-1] | state_str[_n+6] == state_str[_n-1] | state_str[_n+7] == state_str[_n-1] | state_str[_n+8] == state_str[_n-1] | state_str[_n+9] == state_str[_n-1] | state_str[_n+10] == state_str[_n-1] | state_str[_n+11] == state_str[_n-1] | state_str[_n+12] == state_str[_n-1]);

* drop any quarters with missing account data;
* do this only as a robustness check;
*drop if num_all_tot == .;

* first keep only people continuously in the sample (no missing quarters);

drop if (qsince < -`numq' | qsince > `numq');

* remake obsnum;
drop obsnum;
by cid: gen numobs = _N;
by cid: gen obsnum = _n;

egen maxqsince = max(qsince), by(cid);
egen minqsince = min(qsince), by(cid);

* need to be in sample at least since -`numq'and at least to +`numq';
keep if (maxqsince >= `numq' & minqsince <= -`numq');

gen qinsample = maxqsince - minqsince + 1;

tab qinsample if obsnum == 1;
tab numobs if obsnum == 1;
count if qinsample > numobs & obsnum ==1;

count if qinsample > numobs;

* drop if missingany quarters in pre/post period;
drop if qinsample > numobs;

* save balanced panel;
compress;
save ../tmp_data/ccp_tornado_sample_not_balanced_within_`buffer'_ttype`tt'_bal`numq'prepost_near`cg', replace;









local buffer = "hetall";
local numq = "12";
local tt = "1";

use ../tmp_data/ccp_tornado_sample_not_balanced_within_`buffer'_ttype`tt'_bal`numq'prepost_near`cg', clear;


gen year = year(dofq(qdate));
gen age = year - birthyear if birthyear ~= 0;
                  
gen tmp_avail_bcret_tot = hic_bcret_tot - bal_bcret_tot if (qsince<=-1 & qsince>=-12);
replace tmp_avail_bcret_tot = 0 if (bal_bcret_tot >= hic_bcret_tot & bal_bcret_tot~=. & (qsince<=-1 & qsince>=-12));
gen tmp_util_bcret_tot = bal_bcret_tot/hic_bcret_tot if (qsince<=-1 & qsince>=-12);
replace tmp_util_bcret_tot = 1 if (bal_bcret_tot >= hic_bcret_tot & bal_bcret_tot~=. & (qsince<=-1 & qsince>=-12));

* Calculate means in the pre-period;
egen m_tmp_avail_bcret_tot = mean(tmp_avail_bcret_tot), by(cid);                  
egen m_tmp_util_bcret_tot = mean(tmp_util_bcret_tot), by(cid);                  
egen m_riskscore = mean(riskscore), by(cid);                  
                  
_pctile m_tmp_avail_bcret_tot if m_tmp_avail_bcret_tot~=. & qsince==-1, nq(100);
disp("low available credit: `r(r33)'");
gen tmp_lac = 0 if m_tmp_avail_bcret_tot ~= . & qsince==-1;                   
replace tmp_lac = 1 if m_tmp_avail_bcret_tot <= `r(r33)' & qsince==-1;
disp("high available credit: `r(r67)'");
gen tmp_hac = 0 if m_tmp_avail_bcret_tot ~= . & qsince==-1;                   
replace tmp_hac = 1 if m_tmp_avail_bcret_tot >= `r(r67)' & qsince==-1;

_pctile m_tmp_util_bcret_tot if m_tmp_util_bcret_tot~=. & qsince==-1, nq(100);
disp("low credit utilization: `r(r33)'");
gen tmp_luc = 0 if m_tmp_util_bcret_tot ~= . & qsince==-1;                   
replace tmp_luc = 1 if m_tmp_util_bcret_tot <= `r(r33)' & qsince==-1;
disp("high credit utilization: `r(r67)'");
gen tmp_huc = 0 if m_tmp_util_bcret_tot ~= . & qsince==-1;                   
replace tmp_huc = 1 if m_tmp_util_bcret_tot >= `r(r67)' & qsince==-1;

_pctile age if age ~= . & qsince==-1, nq(100);
disp("young age: `r(r33)'");
gen tmp_young = 0 if age ~= . & qsince==-1;                   
replace tmp_young = 1 if age <= `r(r33)' & qsince==-1;
disp("old age: `r(r67)'");
gen tmp_old = 0 if age ~= . & qsince==-1;                   
replace tmp_old = 1 if age >= `r(r67)' & age~=. & qsince==-1;                  

_pctile m_riskscore if m_riskscore ~= . & qsince==-1, nq(100);
disp("lrs risk score: `r(r33)'");
gen tmp_lrs = 0 if m_riskscore ~= . & qsince==-1;
replace tmp_lrs = 1 if m_riskscore <= `r(r33)' & m_riskscore ~= . & qsince==-1;
disp("hrs risk score: `r(r67)'");
gen tmp_hrs = 0 if m_riskscore ~= . & qsince==-1;
replace tmp_hrs = 1 if m_riskscore >= `r(r67)' & m_riskscore ~= . & qsince==-1;

egen lac = min(tmp_lac), by(cid);
egen hac = min(tmp_hac), by(cid);
egen luc = min(tmp_luc), by(cid);
egen huc = min(tmp_huc), by(cid);
egen young = min(tmp_young), by(cid);
egen old = min(tmp_old), by(cid);
egen lrs = min(tmp_lrs), by(cid);
egen hrs = min(tmp_hrs), by(cid);
drop m_riskscore tmp_avail_bcret_tot tmp_util_bcret_tot m_tmp_avail_bcret_tot m_tmp_util_bcret_tot tmp_lac tmp_hac tmp_luc tmp_huc tmp_young tmp_old tmp_lrs tmp_hrs;


sort cid qdate;
drop if cid==cid[_n-1] & qdate == qdate[_n-1];                    

count if qsince == .;

* Winsorize all balance and credit limit variables at the 99th percentile;
sum qdate, d;
foreach v of varlist bal_* {;
forvalues qd=157/227 {;                  
  cap sum `v' if qdate==`qd', d;
  cap replace `v' = `r(p99)' if `v'>`r(p99)' & `v' ~= . & qdate==`qd';
};
};
foreach v of varlist hic_* {;
forvalues qd=157/227 {;                  
  cap sum `v' if qdate==`qd', d;
  cap replace `v' = `r(p99)' if `v'>`r(p99)' & `v' ~= . & qdate==`qd';
};
};
                  
rename individual_assistance ia;                                               

gen post = (qsince >=0 & qsince <= 12);
gen pre = (qsince >=-12 & qsince <= -2);

gen post_t = treated * post;
gen pre_t = treated * pre;

gen post_ia = ia * post;
gen pre_ia = ia * pre;

gen post_ia_t = treat * ia * post;
gen pre_ia_t = treat * ia * pre;

gen tract_str_qhit = substr(block_str_qhit, 1, 11);
cap drop county_str;
gen county_str_qhit = substr(block_str_qhit, 1, 5);
gen state_str_qhit = substr(block_str_qhit, 1, 2);

gen moveoutblock = (block_str ~= block_str_qhit);
gen moveouttract = (tract_str ~= tract_str_qhit);
gen moveoutcounty = (county_str ~= county_str_qhit);
gen moveoutstate = (state_str ~= state_str_qhit);

                  
cap drop ncid;
egen ncid = group(cid);
tsset ncid qdate, quarterly;
gen fstart = 0;
replace fstart = 1 if fforeclosure == 1 & l1.fforeclosure == 0;

gen tmp_movedaftertornado = 0 if qsince==0;
replace tmp_movedaftertornado = 1 if (qsince==0 & (moveoutblock==1 | f1.moveoutblock==1 | f2.moveoutblock==1 | f3.moveoutblock==1 | f4.moveoutblock==1 | f5.moveoutblock==1 | f6.moveoutblock==1 | f7.moveoutblock==1 | f8.moveoutblock==1 | f9.moveoutblock==1 | f10.moveoutblock==1 | f11.moveoutblock==1 | f12.moveoutblock==1));
egen movedaftertornado = max(tmp_movedaftertornado), by(cid);
drop tmp_movedaftertornado;

gen tmp_had_cc = gt1_bcret_tot if qsince == -1;
egen had_cc = min(tmp_had_cc), by(cid);
drop tmp_had_cc;

gen tmp_had_home = gt1_home_tot if qsince == -1;
egen had_home = min(tmp_had_home), by(cid);
drop tmp_had_home;

gen tmp_had_auto = gt1_auto_tot if qsince == -1;
egen had_auto = min(tmp_had_auto), by(cid);
drop tmp_had_auto;

gen bal_newother_tot = bal_consf_tot + bal_retal_tot + bal_other_tot;
gen bal_newtotal_tot = bal_newother_tot + bal_bankc_tot + bal_auto_tot + bal_home_tot;
gen bal_newall_tot = bal_newother_tot + bal_bankc_tot + bal_auto_tot + bal_home_tot;

gen havehome = 0 if num_home_tot == 0;
replace havehome = 1 if (num_home_tot >= 1 & num_home_tot ~=.);

tsset ncid qdate, quarterly;                  
gen alwayshadhome = 1 if (qsince==0 & l1.havehome==1 & l2.havehome==1 & l3.havehome==1 & l4.havehome==1 & l5.havehome==1 & l6.havehome==1 & l7.havehome==1 & l8.havehome==1 & l9.havehome==1 & l10.havehome==1 & l11.havehome==1 & l12.havehome==1);
bys cid (alwayshadhome): replace alwayshadhome = alwayshadhome[1];

* Mortgage balance conditional on having a home loan in all 12 quarters before tornado;                  
gen bal_home_tot_cond = bal_home_tot if alwayshadhome==1 & fstart~=.;
gen bal_fmrtg_tot_cond = bal_fmrtg_tot if alwayshadhome==1 & fstart~=.;
gen bal_hel_tot_cond = bal_hel_tot if alwayshadhome==1 & fstart~=.;
gen bal_helin_tot_cond = bal_helin_tot if alwayshadhome==1 & fstart~=.;
gen bal_helre_tot_cond = bal_helre_tot if alwayshadhome==1 & fstart~=.;
gen gt1_home_tot_cond = gt1_home_tot if alwayshadhome==1 & fstart~=.;
gen fstart_cond = fstart if alwayshadhome==1 & bal_home_tot~=.;
gen fforeclosure_cond = fforeclosure if alwayshadhome==1 & bal_home_tot~=.;
drop ncid;                  
                  
foreach v in `cavars' {;                  
  gen `v'_you = `v' if young == 1;
  gen `v'_old = `v' if old == 1;
  gen `v'_lrs = `v' if lrs == 1;
  gen `v'_hrs = `v' if hrs == 1;
  gen `v'_lac = `v' if lac == 1;
  gen `v'_hac = `v' if hac == 1;
  gen `v'_luc = `v' if luc == 1;
  gen `v'_huc = `v' if huc == 1;
  gen `v'_mat = `v' if movedaftertornado == 1;
  gen `v'_sat = `v' if movedaftertornado == 0;
};

                  
foreach v in lmedhhinc lmedoohval povrate foo fba f65p faa fhisp {;
  gen `v'_p = `v' * post;
};



gen qsp0 = (qsince == 0);
foreach v in ia  {;
  gen `v'_qsp0 = `v' * qsp0;
};

forvalues i = 1(1)12 {;
  local m = 0 - `i';
  gen qsp`i' = (qsince == `i');
  gen qsm`i' = (qsince == `m');

  foreach v in ia  {;
    gen `v'_qsp`i' = `v' * qsp`i';
    gen `v'_qsm`i' = `v' * qsm`i';
  };
};


* interacted with treatment status;

gen qsp0_t = (qsince == 0 & treated==1);
gen ia_qsp0_t = (ia_qsp0==1 & treated==1);

forvalues i = 1(1)12 {;
  local m = 0 - `i';
  gen qsp`i'_t = (qsince == `i' & treated==1);
  gen qsm`i'_t = (qsince == `m' & treated==1);

  gen ia_qsp`i'_t = (ia_qsp`i'==1 & treated==1);
  gen ia_qsm`i'_t = (ia_qsm`i'==1 & treated==1);

};

gen byte ia_t = ia * treated;
gen byte ia_post_t = ia * post * treated;
gen byte ia_post = ia * post;

xi i.qdate;

gen ia_wgtintens = weighted_intensity * ia;

gen pre_wgtintens = pre * weighted_intensity;
gen pre_ef_group1 = pre * ef_group1;
gen pre_ef_group2 = pre * ef_group2;
gen pre_ef_group3 = pre * ef_group3;

gen post_wgtintens = post * weighted_intensity;
gen post_ef_group1 = post * ef_group1;
gen post_ef_group2 = post * ef_group2;
gen post_ef_group3 = post * ef_group3;

gen pre_ia_wgtintens = pre * weighted_intensity * ia;
gen pre_ia_ef_group1 = pre * ef_group1 * ia;
gen pre_ia_ef_group2 = pre * ef_group2 * ia;
gen pre_ia_ef_group3 = pre * ef_group3 * ia;

gen post_ia_wgtintens = post * weighted_intensity * ia;
gen post_ia_ef_group1 = post * ef_group1 * ia;
gen post_ia_ef_group2 = post * ef_group2 * ia;
gen post_ia_ef_group3 = post * ef_group3 * ia;

forvalues i = 0(1)12 {;
  gen efwtqsp`i' = weighted_intensity * qsp`i';
  gen efg1qsp`i' = ef_group1 * qsp`i';
  gen efg2qsp`i' = ef_group2 * qsp`i';
  gen efg3qsp`i' = ef_group3 * qsp`i';
                      
  gen ia_efwtqsp`i' = weighted_intensity * ia *qsp`i';
  gen ia_efg1qsp`i' = ef_group1 * ia * qsp`i';
  gen ia_efg2qsp`i' = ef_group2 * ia * qsp`i';
  gen ia_efg3qsp`i' = ef_group3 * ia * qsp`i';
};
forvalues i = 2(1)12 {;
  gen efwtqsm`i' = weighted_intensity * qsm`i';
  gen efg1qsm`i' = ef_group1 * qsm`i';
  gen efg2qsm`i' = ef_group2 * qsm`i';
  gen efg3qsm`i' = ef_group3 * qsm`i';
                      
  gen ia_efwtqsm`i' = weighted_intensity * ia * qsm`i';
  gen ia_efg1qsm`i' = ef_group1 * ia * qsm`i';
  gen ia_efg2qsm`i' = ef_group2 * ia * qsm`i';
  gen ia_efg3qsm`i' = ef_group3 * ia * qsm`i';
};

* year indicators for event study;
gen ys1p = (qsp1 | qsp2 | qsp3 | qsp4);
gen ys1palt = (qsp0 | qsp1 | qsp2 | qsp3 | qsp4);
gen ys2p = (qsp5 | qsp6 | qsp7 | qsp8);
gen ys3p = (qsp9 | qsp10 | qsp11 | qsp12);

gen ys2m = (qsm5 | qsm6 | qsm7 | qsm8);
gen ys3m = (qsm9 | qsm10 | qsm11 | qsm12);

gen t = treated;
gen efwt = weighted_intensity;
gen efg1 = ef_group1;
gen efg2 = ef_group2;
gen efg3 = ef_group3;

foreach ext in t ia efwt efg1 efg2 efg3 {;
  foreach v in qsp0 ys1p ys1palt ys2p ys3p ys2m ys3m { ;
    cap gen `v'_`ext' = `v' * `ext';                   
  };
};
foreach ext in efwt efg1 efg2 efg3 {;
  foreach v in qsp0 ys1p ys1palt ys2p ys3p ys2m ys3m { ;
    cap gen `v'_`ext'_ia = `v' * `ext' * ia;                   
    cap gen `v'_`ext'_ia_t = `v' * `ext' * ia * t;                   
  };
};


compress;
save ../tmp_data/ccp_tornado_`buffer'_ttype`tt'_bal`numq'prepost_for_analysis_near`cg', replace;




* migration samples;


* Create a version of the near dataset that keeps people that left tornado or control area before the tornado for migration analysis: verssion with 1-2 mile away people as control group;

use ../raw_data/ccp_ever_tornado_block_100pct_v2.dta;

sort cid qtr;
drop if cid==cid[_n-1] & qtr==qtr[_n-1];                                     
merge 1:1 cid qtr using ../tmp_data/ccp_primary_cid_100pct_bankruptcy-7-12.dta;
drop if _merge==2;
drop _merge;

sort cid qtr;
drop if cid==cid[_n-1] & qtr==qtr[_n-1];                                     
merge 1:1 cid qtr using ../tmp_data/ccp_primary_cid_100pct_studentloan.dta;
drop if _merge==2;
drop _merge;

sort cid qtr;

gen qdate = yq(year(qtr), quarter(qtr));
format qdate %tq;
drop qtr;

sort cid qdate;
merge m:1 cid qdate using ../tmp_data/ccp_new_auto_loan_data_near.dta;
drop if _merge == 2;
drop _merge;
replace balnewauto = 0 if balnewauto == .;                               
replace numautoopen = 0 if numautoopen == .;                               


* Merge in state fips codes;

sort state;
merge m:1 state using ../raw_data/state_fips.dta;
tab _merge;
keep if _merge == 3;
drop _merge;

* drop observations missing a census tract since we can't tell where they are living at that time;
drop if (census_tract == "000000" | census_tract == "");

* Current block of residence;
gen block_str = string(state_code, "%02.0f") + county_code + census_tract + census_block;


* Merge in tornado hits;
* the using file has qdate set to any quarter in the 25q panel;
sort block_str qdate;
merge m:1 block_str qdate using ../tmp_data/hit_and_buffer_blocks_within_hetall_for_first_merge_near_25q`cg'.dta;
drop if _merge == 2;

* count a person as present for the tornado if they were there at the end of the quarter before the tornado;
gen pres_tornado = (_merge == 3);
drop _merge;




* Need to figure out how many times is the max to be hit by a tornado or in a buffer block;
bys cid: egen max_tornado = sum(pres_tornado);
bys cid: gen obsnum = _n;
egen tmp_treated = min(treated), by(cid);
drop treated;
rename tmp_treated treated;

tab max_tornado treated if obsnum == 1;

* Drop people who were not present in any of the 12 quarters before the tornado;
drop if (max_tornado == . | max_tornado == 0);

tab max_tornado treated if obsnum == 1;

sort cid qdate;

* spread block of residence in quarter of tornado to all observations of person and damcat;
gen block_str_qhit = block_str if id ~= . & pres_tornado==1;
gsort cid qdate;
replace block_str_qhit = block_str_qhit[_n-1] if block_str_qhit=="" & block_str_qhit[_n-1]~="" & cid==cid[_n-1];
replace weighted_intensity = weighted_intensity[_n-1] if weighted_intensity==. & weighted_intensity[_n-1]~=. & cid==cid[_n-1];
replace area_hit = area_hit[_n-1] if area_hit==. & area_hit[_n-1]~=. & cid==cid[_n-1];
replace ef_group1 = ef_group1[_n-1] if ef_group1==. & ef_group1[_n-1]~=. & cid==cid[_n-1];
replace ef_group2 = ef_group2[_n-1] if ef_group2==. & ef_group2[_n-1]~=. & cid==cid[_n-1];
replace ef_group3 = ef_group3[_n-1] if ef_group3==. & ef_group3[_n-1]~=. & cid==cid[_n-1];
replace ef_group4 = ef_group4[_n-1] if ef_group4==. & ef_group4[_n-1]~=. & cid==cid[_n-1];
replace ef_group5 = ef_group5[_n-1] if ef_group5==. & ef_group5[_n-1]~=. & cid==cid[_n-1];
gsort cid -qdate;
replace block_str_qhit = block_str_qhit[_n-1] if block_str_qhit=="" & block_str_qhit[_n-1]~="" & cid==cid[_n-1];
replace weighted_intensity = weighted_intensity[_n-1] if weighted_intensity==. & weighted_intensity[_n-1]~=. & cid==cid[_n-1];
replace area_hit = area_hit[_n-1] if area_hit==. & area_hit[_n-1]~=. & cid==cid[_n-1];
replace ef_group1 = ef_group1[_n-1] if ef_group1==. & ef_group1[_n-1]~=. & cid==cid[_n-1];
replace ef_group2 = ef_group2[_n-1] if ef_group2==. & ef_group2[_n-1]~=. & cid==cid[_n-1];
replace ef_group3 = ef_group3[_n-1] if ef_group3==. & ef_group3[_n-1]~=. & cid==cid[_n-1];
replace ef_group4 = ef_group4[_n-1] if ef_group4==. & ef_group4[_n-1]~=. & cid==cid[_n-1];
replace ef_group5 = ef_group5[_n-1] if ef_group5==. & ef_group5[_n-1]~=. & cid==cid[_n-1];



* spread tornado id to all observations of person;
bys cid: egen tmp_id = min(id);
replace id = tmp_id if id == .;
drop tmp_id;

* do all things based upon block of residence at the end of the quarter before the tornado;
rename block_str tmp_block_str;
rename block_str_qhit block_str;

* merge in full tornado information;
sort id block_str;
merge m:1 id block_str using ../tmp_data/hit_and_buffer_blocks_within_hetall_near`cg'.dta;
drop if _merge == 2;
drop _merge;

* Merge in block group data regarding demographics of block group at time of tornado;
* merge in census block group demographic variables;
gen bg_str = substr(block_str, 1, 12);
sort bg_str;
merge m:1 bg_str using ../raw_data/census2000_blck_grp.dta;
drop if _merge == 2;
drop _merge;
sort bg_str;
merge m:1 bg_str using ../raw_data/census2000_blck_grp_vars.dta;
drop if _merge == 2;
drop _merge;
drop bg_str;

rename block_str block2000;
merge m:1 block2000 using ../raw_data/census2000_block_vars.dta;
drop if _merge == 2;
drop _merge;
rename block2000 block_str;

rename block_str block_str_qhit;
rename tmp_block_str block_str;

* get the actual quarter of the tornado;
gen qtornado = yq(year(sdate), quarter(sdate));

count if qdate == .;
count if qtornado == .;

* the actual quarter of the tornado is when qsince = 0;
* this means the tornado happens between -1 and 0;
gen qsince = (qdate - qtornado);


* create ccp variables;
do variable_creation_num.do;
do variable_creation_gt1.do;
do variable_creation_bal.do;

compress;

* Put balances into real terms;
sort qdate;
merge m:1 qdate using ../raw_data/cpi-r_allitems_urban_quarterly_sa.dta;
drop if _merge == 2;
drop _merge;


* Put into 2010Q1 dollars;
foreach v of varlist bal_* balnewauto {;
  replace `v' = `v' * 217.34067 / cpi;
};

foreach v of varlist hic_* {;
  replace `v' = `v' * 217.34067 / cpi;
};
gen lbal_auto_tot = log(1+bal_auto_tot);
gen lbal_bcret_tot = log(1+bal_bcret_tot);
gen lbal_bankc_tot = log(1+bal_bankc_tot);
gen lbal_home_tot = log(1+bal_home_tot);
gen lbal_consf_tot = log(1+bal_consf_tot);
gen lbal_other2_tot = log(1+bal_other2_tot);
gen lbalnewauto = log(1+balnewauto);


drop lmedoohval lmedhhinc;
replace medoohval = medoohval * 217.34067 / 170.1;
replace medhhinc = medhhinc * 217.34067 / 170.1;
gen lmedoohval = log(medoohval);
gen lmedhhinc = log(medhhinc);
compress;

gen ttype1 = treated;
replace ttype1 = . if area_hit < 0.5;
drop if ttype1 == .;

sort cid qdate;
drop if cid==cid[_n-1] & qdate == qdate[_n-1];                    

* drop any quarters with missing account data;
* only do this as robustness check;
*drop if num_all_tot == .;

* first keep only people continuously in the sample (no missing quarters);

gen tract_str = substr(block_str, 1, 11);
gen tract_str_qhit = substr(block_str_qhit, 1, 11);
cap drop county_str;
gen county_str = substr(block_str, 1, 5);
gen county_str_qhit = substr(block_str_qhit, 1, 5);
gen state_str = substr(block_str, 1, 2);
gen state_str_qhit = substr(block_str_qhit, 1, 2);

* create moving variables before limiting sample to 12 quarters before tornado;
sort cid qdate;
gen moveblock = (block_str ~= block_str[_n-1]);
gen movetract = (tract_str ~= tract_str[_n-1]);
gen movecounty = (county_str ~= county_str[_n-1]);
gen movestate = (state_str ~= state_str[_n-1]);

* create moving variables before limiting sample to 12 quarters before tornado;
* conditional on not moving back in the next 3 years;
sort cid qdate;
gen moveblock3yr = (block_str ~= block_str[_n-1]);
replace moveblock3yr = 0 if (block_str[_n+1] == block_str[_n-1] | block_str[_n+2] == block_str[_n-1] | block_str[_n+3] == block_str[_n-1] | block_str[_n+4] == block_str[_n-1] | block_str[_n+5] == block_str[_n-1] | block_str[_n+6] == block_str[_n-1] | block_str[_n+7] == block_str[_n-1] | block_str[_n+8] == block_str[_n-1] | block_str[_n+9] == block_str[_n-1] | block_str[_n+10] == block_str[_n-1] | block_str[_n+11] == block_str[_n-1] | block_str[_n+12] == block_str[_n-1]);
gen movetract3yr = (tract_str ~= tract_str[_n-1]);
replace movetract3yr = 0 if (tract_str[_n+1] == tract_str[_n-1] | tract_str[_n+2] == tract_str[_n-1] | tract_str[_n+3] == tract_str[_n-1] | tract_str[_n+4] == tract_str[_n-1] | tract_str[_n+5] == tract_str[_n-1] | tract_str[_n+6] == tract_str[_n-1] | tract_str[_n+7] == tract_str[_n-1] | tract_str[_n+8] == tract_str[_n-1] | tract_str[_n+9] == tract_str[_n-1] | tract_str[_n+10] == tract_str[_n-1] | tract_str[_n+11] == tract_str[_n-1] | tract_str[_n+12] == tract_str[_n-1]);
gen movecounty3yr = (county_str ~= county_str[_n-1]);
replace movecounty3yr = 0 if (county_str[_n+1] == county_str[_n-1] | county_str[_n+2] == county_str[_n-1] | county_str[_n+3] == county_str[_n-1] | county_str[_n+4] == county_str[_n-1] | county_str[_n+5] == county_str[_n-1] | county_str[_n+6] == county_str[_n-1] | county_str[_n+7] == county_str[_n-1] | county_str[_n+8] == county_str[_n-1] | county_str[_n+9] == county_str[_n-1] | county_str[_n+10] == county_str[_n-1] | county_str[_n+11] == county_str[_n-1] | county_str[_n+12] == county_str[_n-1]);
gen movestate3yr = (state_str ~= state_str[_n-1]);
replace movestate3yr = 0 if (state_str[_n+1] == state_str[_n-1] | state_str[_n+2] == state_str[_n-1] | state_str[_n+3] == state_str[_n-1] | state_str[_n+4] == state_str[_n-1] | state_str[_n+5] == state_str[_n-1] | state_str[_n+6] == state_str[_n-1] | state_str[_n+7] == state_str[_n-1] | state_str[_n+8] == state_str[_n-1] | state_str[_n+9] == state_str[_n-1] | state_str[_n+10] == state_str[_n-1] | state_str[_n+11] == state_str[_n-1] | state_str[_n+12] == state_str[_n-1]);

local numq = 12;
drop if (qsince < -`numq' | qsince > `numq');

* remake obsnum;
drop obsnum;
by cid: gen numobs = _N;
by cid: gen obsnum = _n;

egen maxqsince = max(qsince), by(cid);
egen minqsince = min(qsince), by(cid);

* need to be in sample at least since -`numq'and at least to +`numq';
keep if (maxqsince >= `numq' & minqsince <= -`numq');

gen qinsample = maxqsince - minqsince + 1;

tab qinsample if obsnum == 1;
tab numobs if obsnum == 1;
count if qinsample > numobs & obsnum ==1;

count if qinsample > numobs;

* drop if missingany quarters in pre/post period;
drop if qinsample > numobs;

* save balanced panel;
compress;
rename individual_assistance ia;                                               

gen post = (qsince >=0 & qsince <= 12);
gen pre = (qsince >=-12 & qsince <= -2);

gen post_t = treated * post;
gen pre_t = treated * pre;

gen post_ia = ia * post;
gen pre_ia = ia * pre;

gen post_ia_t = treat * ia * post;
gen pre_ia_t = treat * ia * pre;

gen moveoutblock = (block_str ~= block_str_qhit);
gen moveouttract = (tract_str ~= tract_str_qhit);
gen moveoutcounty = (county_str ~= county_str_qhit);
gen moveoutstate = (state_str ~= state_str_qhit);

table ia treated if qsince==-1, c(mean moveoutblock);
table ia treated if qsince==-1, c(mean moveouttract);
table ia treated if qsince==-1, c(mean moveoutcounty);
table ia treated if qsince==-1, c(mean moveoutstate);




gen year = year(dofq(qdate));
gen age = year - birthyear if birthyear ~= 0;


gen tmp_avail_bcret_tot = hic_bcret_tot - bal_bcret_tot if (qsince<=-1 & qsince>=-12);
replace tmp_avail_bcret_tot = 0 if (bal_bcret_tot >= hic_bcret_tot & bal_bcret_tot~=. & (qsince<=-1 & qsince>=-12));
gen tmp_util_bcret_tot = bal_bcret_tot/hic_bcret_tot if (qsince<=-1 & qsince>=-12);
replace tmp_util_bcret_tot = 1 if (bal_bcret_tot >= hic_bcret_tot & bal_bcret_tot~=. & (qsince<=-1 & qsince>=-12));

* Calculate means in the pre-period;
egen m_tmp_avail_bcret_tot = mean(tmp_avail_bcret_tot), by(cid);                  
egen m_tmp_util_bcret_tot = mean(tmp_util_bcret_tot), by(cid);                  
egen m_riskscore = mean(riskscore), by(cid);                  
                  
_pctile m_tmp_avail_bcret_tot if m_tmp_avail_bcret_tot~=. & qsince==-1, nq(100);
disp("low available credit: `r(r33)'");
gen tmp_lac = 0 if m_tmp_avail_bcret_tot ~= . & qsince==-1;                   
replace tmp_lac = 1 if m_tmp_avail_bcret_tot <= `r(r33)' & qsince==-1;
disp("high available credit: `r(r67)'");
gen tmp_hac = 0 if m_tmp_avail_bcret_tot ~= . & qsince==-1;                   
replace tmp_hac = 1 if m_tmp_avail_bcret_tot >= `r(r67)' & qsince==-1;

_pctile m_tmp_util_bcret_tot if m_tmp_util_bcret_tot~=. & qsince==-1, nq(100);
disp("low credit utilization: `r(r33)'");
gen tmp_luc = 0 if m_tmp_util_bcret_tot ~= . & qsince==-1;                   
replace tmp_luc = 1 if m_tmp_util_bcret_tot <= `r(r33)' & qsince==-1;
disp("high credit utilization: `r(r67)'");
gen tmp_huc = 0 if m_tmp_util_bcret_tot ~= . & qsince==-1;                   
replace tmp_huc = 1 if m_tmp_util_bcret_tot >= `r(r67)' & qsince==-1;

_pctile age if age ~= . & qsince==-1, nq(100);
disp("young age: `r(r33)'");
gen tmp_young = 0 if age ~= . & qsince==-1;                   
replace tmp_young = 1 if age <= `r(r33)' & qsince==-1;
disp("old age: `r(r67)'");
gen tmp_old = 0 if age ~= . & qsince==-1;                   
replace tmp_old = 1 if age >= `r(r67)' & age~=. & qsince==-1;                  

_pctile m_riskscore if m_riskscore ~= . & qsince==-1, nq(100);
disp("lrs risk score: `r(r33)'");
gen tmp_lrs = 0 if m_riskscore ~= . & qsince==-1;
replace tmp_lrs = 1 if m_riskscore <= `r(r33)' & m_riskscore ~= . & qsince==-1;
disp("hrs risk score: `r(r67)'");
gen tmp_hrs = 0 if m_riskscore ~= . & qsince==-1;
replace tmp_hrs = 1 if m_riskscore >= `r(r67)' & m_riskscore ~= . & qsince==-1;


egen lac = min(tmp_lac), by(cid);
egen hac = min(tmp_hac), by(cid);
egen luc = min(tmp_luc), by(cid);
egen huc = min(tmp_huc), by(cid);
egen young = min(tmp_young), by(cid);
egen old = min(tmp_old), by(cid);
egen lrs = min(tmp_lrs), by(cid);
egen hrs = min(tmp_hrs), by(cid);
drop m_riskscore tmp_avail_bcret_tot tmp_util_bcret_tot m_tmp_avail_bcret_tot m_tmp_util_bcret_tot tmp_lac tmp_hac tmp_luc tmp_huc tmp_young tmp_old tmp_lrs tmp_hrs;


foreach v in `movevars' {;                  
  gen `v'_you = `v' if young == 1;
  gen `v'_old = `v' if old == 1;
  gen `v'_lrs = `v' if lrs == 1;
  gen `v'_hrs = `v' if hrs == 1;
  gen `v'_lac = `v' if lac == 1;
  gen `v'_hac = `v' if hac == 1;
  gen `v'_luc = `v' if luc == 1;
  gen `v'_huc = `v' if huc == 1;
  *gen `v'_mat = `v' if movedaftertornado == 1;
  *gen `v'_sat = `v' if movedaftertornado == 0;
};

drop if treat == .;
replace ef_group1 = 0 if treated == 0;
replace ef_group2 = 0 if treated == 0;
replace ef_group3 = 0 if treated == 0;
replace ef_group4 = 0 if treated == 0;
replace ef_group5 = 0 if treated == 0;

* interacted with treatment status;

gen byte ia_t = ia * treated;
gen byte ia_post_t = ia * post * treated;
gen byte ia_post = ia * post;

xi i.qdate;

forvalues i = 0(1)12 {;
  gen qsp`i' = (qsince == `i');
  gen qsp`i'_t = (qsince == `i' & treated==1);

  gen ia_qsp`i' = ia * qsp`i';

  gen ia_qsp`i'_t = (ia_qsp`i'==1 & treated==1);

  gen efwtqsp`i' = weighted_intensity * qsp`i';
  gen efg1qsp`i' = ef_group1 * qsp`i';
  gen efg2qsp`i' = ef_group2 * qsp`i';
  gen efg3qsp`i' = ef_group3 * qsp`i';
                      
  gen ia_efwtqsp`i' = weighted_intensity * ia * qsp`i';
  gen ia_efg1qsp`i' = ef_group1 * ia * qsp`i';
  gen ia_efg2qsp`i' = ef_group2 * ia * qsp`i';
  gen ia_efg3qsp`i' = ef_group3 * ia * qsp`i';
};
forvalues i = 2(1)12 {;
  local m = 0 - `i';
  gen qsm`i' = (qsince == `m');
  gen qsm`i'_t = (qsince == `m' & treated==1);

  gen ia_qsm`i' = ia * qsm`i';

  gen ia_qsm`i'_t = (ia_qsm`i'==1 & treated==1);
                      
  gen efwtqsm`i' = weighted_intensity * qsm`i';
  gen efg1qsm`i' = ef_group1 * qsm`i';
  gen efg2qsm`i' = ef_group2 * qsm`i';
  gen efg3qsm`i' = ef_group3 * qsm`i';
                      
  gen ia_efwtqsm`i' = weighted_intensity * ia * qsm`i';
  gen ia_efg1qsm`i' = ef_group1 * ia * qsm`i';
  gen ia_efg2qsm`i' = ef_group2 * ia * qsm`i';
  gen ia_efg3qsm`i' = ef_group3 * ia * qsm`i';
};
compress;

* year indicators for event study;
gen ys1p = (qsp1 | qsp2 | qsp3 | qsp4);
gen ys1palt = (qsp0 | qsp1 | qsp2 | qsp3 | qsp4);
gen ys2p = (qsp5 | qsp6 | qsp7 | qsp8);
gen ys3p = (qsp9 | qsp10 | qsp11 | qsp12);

gen ys2m = (qsm5 | qsm6 | qsm7 | qsm8);
gen ys3m = (qsm9 | qsm10 | qsm11 | qsm12);

gen t = treated;
gen efwt = weighted_intensity;
gen efg1 = ef_group1;
gen efg2 = ef_group2;
gen efg3 = ef_group3;

foreach ext in t ia efwt efg1 efg2 efg3 {;
  foreach v in qsp0 ys1p ys1palt ys2p ys3p ys2m ys3m { ;
    cap gen `v'_`ext' = `v' * `ext';                   
  };
};
foreach ext in efwt efg1 efg2 efg3 {;
  foreach v in qsp0 ys1p ys1palt ys2p ys3p ys2m ys3m { ;
    cap gen `v'_`ext'_ia = `v' * `ext' * ia;                   
    cap gen `v'_`ext'_ia_t = `v' * `ext' * ia * t;                   
  };
};


gen ia_wgtintens = weighted_intensity * ia;

gen pre_wgtintens = pre * weighted_intensity;
gen pre_ef_group1 = pre * ef_group1;
gen pre_ef_group2 = pre * ef_group2;
gen pre_ef_group3 = pre * ef_group3;

gen post_wgtintens = post * weighted_intensity;
gen post_ef_group1 = post * ef_group1;
gen post_ef_group2 = post * ef_group2;
gen post_ef_group3 = post * ef_group3;

gen pre_ia_wgtintens = pre * weighted_intensity * ia;
gen pre_ia_ef_group1 = pre * ef_group1 * ia;
gen pre_ia_ef_group2 = pre * ef_group2 * ia;
gen pre_ia_ef_group3 = pre * ef_group3 * ia;

gen post_ia_wgtintens = post * weighted_intensity * ia;
gen post_ia_ef_group1 = post * ef_group1 * ia;
gen post_ia_ef_group2 = post * ef_group2 * ia;
gen post_ia_ef_group3 = post * ef_group3 * ia;

compress;
save ../tmp_data/ccp_tornado_sample_not_balanced_within_hetall_near`cg'_migration.dta, replace;

* End of version of the near dataset that keeps people that left tornado or control area before the tornado for migration analysis: vversion where control group is people 1-2 miles from tornado;


};



***** Checking auto tradeline coverage;

* find set of cids to pull auto tradelin data for;
use ../tmp_data/ccp_tornado_hetall_ttype1_bal12prepost_for_analysis_near.dta, clear;
keep cid;
sort cid;
drop if cid == cid[_n-1];
count;
compress;
save ../tmp_data/cids_for_auto_tradeline.dta, replace;
outsheet using ../tmp_data/cids_for_auto_tradeline.csv, comma replace noquote;



* Create dataset to check presence of auto loan data in tradelines against regular CCP;
use ../raw_data/ccp_auto_tradline_near.dta, clear;
sort cid qtr tlid;

* Observations only in Q2 and Q4 through 2016;

keep if balance > 0 & balance ~=.;
gen qdate = yq(year(qtr), quarter(qtr));
format qdate %tq;
keep cid qdate;
sort cid qdate;
drop if cid == cid[_n-1] & qdate==qdate[_n-1];
sort cid qdate;
save ../tmp_data/ccp_auto_tradeline_cid_qtrs.dta, replace;


use ../tmp_data/ccp_tornado_hetall_ttype1_bal12prepost_for_analysis_near.dta, clear;

tab qdate;

sort cid qdate;
merge m:1 cid qdate using ../tmp_data/ccp_auto_tradeline_cid_qtrs.dta;
* check merge in overlapping period;
tab _merge if year >= 2000 & year <= 2016 & (quarter(dofq(qdate)) == 2 | quarter(dofq(qdate)) == 4);
drop if _merge == 2;
gen autoloanintradeline = _merge == 3;

* The coverage looks pretty goodfor the period that the tradeline data covers (2000-2016 Q2 and Q4);
sum gt1_auto_tot autoloanintradeline, d;
tab gt1_auto_tot autoloanintradeline if year >= 2000 & year <= 2016 & (quarter(dofq(qdate)) == 2 | quarter(dofq(qdate)) == 4);

clear;

*********************** end of checking auto tradeline coverage;



log close;
